import syssys.path.append('../../')import BasicLibraries as BLfrom statsmodels.discrete.discrete_model import Probitimport statsmodels.api as smimport global_variables as GVfrom matplotlib.patches import Patchimport warnings, tracebackfrom statsmodels.tools.sm_exceptions import ConvergenceWarningwarnings.simplefilter('ignore', ConvergenceWarning)import warningswarnings.filterwarnings("ignore", category=FutureWarning)warnings.filterwarnings("ignore", category=RuntimeWarning)'''This file includes all the regression specifications and the function to estimate the inverse Mills ratio to adjust for self-selectivity'''################################################################################################################marker_type, line_style, index_dict, abb_index_dict, inverse_dict, STcontrols = GV.global_vars()#################################################################################################################======= Set as True for the robustness test (Table S11)add_initiatives_robustness = Falseadd_goals_robustness = False#=======#%%class Analysis():    def __init__(self):        pass        def get_initiatives_control(self):        cdpINI = BL.pd.read_csv('local_data/CDP_initiatives_count.csv')        cdpINI['mrg'] = cdpINI['gvkey'].astype(str)+'-'+cdpINI['Year'].astype(str)        return cdpINI[['mrg', 'Activity_type']]     def get_goals_controls(self):        cdpGLS = BL.pd.read_csv('local_data/CDP_initiatives_goals.csv')            return cdpGLS[['mrg', 'number_of_goals']]        def filter_outlier(self, X, qtl=0.99, var_to_filter = ['firm_size', 'investment', 'Tangibility',  'Profitability','emission']):        X[var_to_filter] = X[var_to_filter][X[var_to_filter] < X[var_to_filter].quantile(qtl)]        X[var_to_filter] = X[var_to_filter][X[var_to_filter] > X[var_to_filter].quantile(1-qtl)]        return X    def significance(self, X): return(['***' if X < 0.01 else '**' if X < 0.05 else '*' if X < 0.1 else ''][0])    def Heckman_step(self,  emissions, cdp_firms_copy, non_cdp_firms_copy, emission_type = 'DirectControl'):        cdp_firms_copy.insert(1, 'is_cdp', [1]*len(cdp_firms_copy))        non_cdp_firms_copy.insert(0, 'is_cdp', [0]*len(non_cdp_firms_copy))        firms = BL.pd.concat((cdp_firms_copy, non_cdp_firms_copy))        #=== probability of disclosure in the sector        disclosure_prob = firms[['GICS_level_1', 'fyear', 'is_cdp']].groupby(['GICS_level_1', 'fyear']).mean().reset_index()        disclosure_prob['mrg'] = disclosure_prob['GICS_level_1']+'-'+disclosure_prob['fyear'].astype(int).astype(str)        disclosure_prob = disclosure_prob.rename(columns = {'is_cdp': 'disclosure_prob'})        firms.loc[:,'mrg'] = firms['GICS_level_1']+'-'+firms['fyear'].astype(int).astype(str)        firms = firms.merge(disclosure_prob[['disclosure_prob', 'mrg']])        firms.loc[:,'mrg'] = firms['gvkey'].astype(str)+'-'+firms['fyear'].astype(int).astype(str)        #=== Past emissions        emissions = emissions.sort_values(by = ['gvkey', 'cyear'])        emissions.loc[:,'lag1'] = emissions[['gvkey',  emission_type]].groupby('gvkey').shift(1).values.tolist()        emissions.loc[:,'lag2'] = emissions[['gvkey',  emission_type]].groupby('gvkey').shift(2).values.tolist()        emissions.loc[:,'past_emissions'] = emissions[['lag1', 'lag2']].sum(axis = 1).apply(BL.np.log).replace([-BL.np.inf, BL.np.inf], [BL.np.nan, BL.np.nan]).values.tolist()        firms = firms.merge(emissions[['past_emissions', 'mrg']])        #===        firms = firms[firms.fyear > 2011]        firms.loc[:,'investment'] = firms.Total_invested_capital_BS1.apply(BL.np.log)        firms = firms[['is_cdp', 'past_emissions', 'disclosure_prob', 'firm_size', 'investment', 'Tangibility', 'mrg']].replace([-BL.np.inf, BL.np.inf], [BL.np.nan, BL.np.nan]).dropna().set_index('mrg')        Y = firms['is_cdp']        X = firms[firms.columns[1:]]                pb = Probit(Y, X)        pb_out = pb.fit(disp=0)        ### Now get the estimated probability and the inverse Mills ratio        estimated_prob = pb.predict(pb_out.params, X)        IMR = pb.pdf(BL.scale(estimated_prob))/pb.cdf(BL.scale(estimated_prob))        IMR = BL.pd.DataFrame(IMR, index = X.index).reset_index()        IMR.columns = ['mrg', 'IMR']        return IMR        #######################################################    ################### Characteristics ###################    #######################################################        def get_explanators_dt(self, cdp_t, ref_t, st, comp, sec, reg, ref, trucost,  other_esg_tmp, cdp_firms, non_cdp_firms,robustness = False, ratio = 1):        intensity_scale =  'Total_invested_capital_BS1'        cdp_firms_copy = cdp_firms.copy()        non_cdp_firms_copy = non_cdp_firms.copy()        emission_type ='DirectControl'        #=== system thinking by year        st = st[['Pred_label'] + STcontrols + ['gvkey', 'Year']].groupby(['gvkey', 'Year']).mean().reset_index()        st.columns = ['gvkey', 'Year', 'system_thinking']+STcontrols        st.loc[:,'text_length'] = st['text_length'].apply(BL.np.log)        st.loc[:,'mrg'] = st['gvkey'].astype(int).astype(str)+'-'+st['Year'].astype(int).astype(str)        #=== fundamentals        comp = comp[(comp.fyear <= 2020) & (comp.fyear >= 2012) ][['firm_size', 'MTB', 'MarketLeverage', intensity_scale,                                                                     'Tangibility', 'Profitability',  'gvkey',  'fyear']]        comp.loc[:,'mrg']= comp['gvkey'].astype(int).astype(str)+'-'+comp['fyear'].astype(int).astype(str)        #=== Emissions         emissions = trucost[[emission_type, 'cyear', 'gvkey', 'mrg']].copy()        emissions = emissions.sort_values(by = ['gvkey', 'cyear'])        emissions.loc[:,'lag1'] = emissions[['gvkey', emission_type]].groupby('gvkey').shift(1).values.ravel().tolist()        emissions.loc[:,'lag2'] = emissions[['gvkey', emission_type]].groupby('gvkey').shift(2).values.ravel().tolist()        emissions.loc[:, emission_type] = emissions[['lag1', 'lag2']].sum(axis = 1).values.tolist()        emissions = emissions[emissions[emission_type] > 0]        #=== merge them        g = st[['system_thinking']+STcontrols +['mrg']].merge(comp).\            merge(reg.reset_index()).merge(sec.reset_index()).merge(emissions)        g = g[g[intensity_scale] > 0]        g.loc[:,'investment'] = g.Total_invested_capital_BS1.apply(BL.np.log)        g.loc[:,'emission_intensity'] = (g['DirectControl']/g[intensity_scale]).apply(BL.np.log)         g = g.replace(BL.np.inf, BL.np.nan)        heck = self.Heckman_step(emissions, cdp_firms_copy, non_cdp_firms_copy)        g = g.merge(heck, on = 'mrg')        g = self.filter_outlier(g, var_to_filter=['firm_size', 'investment', 'Tangibility',  'Profitability', 'MTB']+STcontrols)        g = g.dropna()        if robustness:            print('robustness test')            sample = BL.np.random.choice(g.gvkey.unique(), int(ratio*len(g.gvkey.unique())), replace = False)            g = g[g.gvkey.isin(sample)]        ############        dummies = []        to_drop=[]        for i in list(sec.columns) +  list(reg.columns):            if g[i].sum() == 0:                to_drop.append(i)            else:                dummies.append(i)        g = g.drop(columns = to_drop)        g[['emission_intensity', 'firm_size','MTB', 'investment', 'Tangibility', 'Profitability', 'IMR' ]] = \            g[['emission_intensity', 'firm_size', 'MTB','investment', 'Tangibility', 'Profitability', 'IMR' ]].apply(BL.scale)        return g, dummies    def Explanators(self, g, dummies):        expl = BL.pd.DataFrame()        X = g[['firm_size', 'investment','MTB', 'MarketLeverage', 'Tangibility', 'Profitability', 'IMR']+STcontrols +\              dummies].copy()        Y = g['system_thinking']        lm = sm.RLM(Y, X, M=sm.robust.norms.HuberT()).fit(cov = 'H2')        standardise_params = lm.params*X.std()/Y.std()        results_ = BL.pd.concat((standardise_params, lm.pvalues), axis = 1)        results_ = results_.round(3)        results_.columns = ['coefficient', 'p-value']        idx = results_.index        results_ = BL.pd.DataFrame([str(results_['coefficient'].iloc[i])+self.significance(results_['p-value'].iloc[i]) for i in range(len(results_))], index = list(idx), columns = ['effect'])        results_.drop(index = dummies+['IMR'], inplace=True)        expl = results_        sgn_feature = []        for i in range(len(expl)):            if '*' in expl.iloc[i].values[0]:                sgn_feature.append(expl.index[i])               expl = expl.rename(index = index_dict)        return g, expl, sgn_feature        ############################################    ################# Policies #################    ############################################        def get_mediators_dt(self, cdp_t, ref_t, st, comp, sec, reg, ref, trucost, explanators,  other_esg_tmp, marginal_=False, robustness = False, ratio = 1):        intensity_scale =  'Total_invested_capital_BS1'        other_esg = other_esg_tmp.copy()        #=== Other ESG        ref = ref[['mrg']+other_esg+['Is_target']]        #=== system thinking by year        st = st[['Pred_label']+STcontrols+ ['gvkey', 'Year']].groupby(['gvkey', 'Year']).mean().reset_index()        st.columns = ['gvkey', 'Year', 'system_thinking']+STcontrols        st.loc[:,'text_length'] = st['text_length'].apply(BL.np.log)        st.loc[:,'mrg'] = st['gvkey'].astype(int).astype(str)+'-'+st['Year'].astype(int).astype(str)        #=== fundamentals        comp = comp[(comp.fyear <= 2020) & (comp.fyear >= 2012) ][['firm_size', 'MTB',intensity_scale, 'MarketLeverage',                                                                     'Tangibility', 'Profitability',  'gvkey',  'fyear']]        comp.loc[:,'mrg']= comp['gvkey'].astype(int).astype(str)+'-'+comp['fyear'].astype(int).astype(str)        #=== Emissions (SettingWithCopyWarning, why?)        emissions = trucost[['DirectControl', 'mrg']].copy()        emissions.loc[:,'DirectControl'] = emissions['DirectControl'].apply(BL.np.log).values.ravel().tolist()        #=== merge them        g = st[['system_thinking']+STcontrols+[ 'mrg']].merge(comp).\            merge(reg.reset_index()).merge(sec.reset_index()).merge(ref.reset_index()).merge(emissions)        g = g[g[intensity_scale] > 0]        g.loc[:,'investment'] = g.Total_invested_capital_BS1.apply(BL.np.log)        g.loc[:,'emission_intensity'] = (g['DirectControl']/g[intensity_scale]).apply(BL.np.log) #/g['firm_size']        g = g.replace(BL.np.inf, BL.np.nan)        g = self.filter_outlier(g, var_to_filter = explanators)        g = g.dropna()                if robustness:            print('robustness test')            sample = BL.np.random.choice(g.gvkey.unique(), int(ratio*len(g.gvkey.unique())), replace = False)            g = g[g.gvkey.isin(sample)]        ############        dummies = []        to_drop=[]        for i in  list(reg.columns):            if g[i].sum() == 0:                to_drop.append(i)            else:                dummies.append(i)        g = g.drop(columns = to_drop)        return g, explanators, other_esg, dummies    def mediators(self, g, explanators, other_esg,  dummies):        med_efx = BL.pd.DataFrame()        for mediators in other_esg+['Is_target']:            X = g[explanators+[ 'emission_intensity', 'system_thinking']+\                  dummies].copy()            Y = g[mediators]            pr = Probit(Y, X).fit(disp=0)            #=== Standardisation from https://esajournals.onlinelibrary.wiley.com/doi/pdf/10.1002/ecs2.2283            std_Y =pr.predict(X).round().std()/BL.np.sqrt(pr.prsquared)            standardise_params = pr.params*X.std()/std_Y            results_ = BL.pd.concat((standardise_params.round(2), pr.pvalues.round(3)), axis = 1)            results_ = results_.loc['system_thinking']            results_ = BL.pd.DataFrame(results_).transpose().round(3)            results_.columns = ['coefficient', 'p-value']            idx = results_.index            results_ = BL.pd.DataFrame([str(results_['coefficient'].iloc[i])+self.significance(results_['p-value'].iloc[i]) for i in range(len(results_))], index = list(idx), columns = ['effect'])            med_efx = BL.pd.concat((med_efx, results_), axis  =1)        med_efx = med_efx.replace([BL.np.nan], [''])        med_efx.columns = other_esg+['Is_target']        med_efx = med_efx.transpose()        sgn_feature = []        for i in range(len(med_efx)):            if '*' in med_efx.iloc[i].values[0]:                sgn_feature.append(med_efx.index[i])        med_efx = med_efx.rename(columns = index_dict)                return g, med_efx, sgn_feature    #######################################################    ###################### Emissions ######################    #######################################################        def get_emissions_dt(self, cdp_t, ref_t, st, comp, sec, reg, ref, trucost, explanators, other_esg_tmp,   cdp_firms, non_cdp_firms, robustness=False, ratio = 1):        intensity_scale =  'Total_invested_capital_BS1'        cdp_firms_copy = cdp_firms.copy()        non_cdp_firms_copy = non_cdp_firms.copy()        emission_type =  'DirectControl'        other_esg = other_esg_tmp.copy()        #=== Internal targets        z = ref_t[['Is_target', 'mrg', 'gvkey', 'fyear']].merge(cdp_t[['Is_target', 'mrg']], on = 'mrg')        z = z.rename(columns = {'Is_target_x': 'is_target',                                'Is_target_y': 'target_type'})        #=== Other ESG        ref = ref[['mrg']+other_esg].copy()        #=== system thinking by year        st = st[['Pred_label']+STcontrols+['gvkey', 'Year']].groupby(['gvkey', 'Year']).mean()        st.columns = ['system_thinking']+STcontrols        st.loc[:,'text_length'] = st['text_length'].apply(BL.np.log)        st = st.reset_index()        st.loc[:,'mrg'] = st['gvkey'].astype(int).astype(str)+'-'+st['Year'].astype(int).astype(str)        #=== fundamentals        comp = comp[(comp.fyear <= 2020) & (comp.fyear >= 2012) ][['firm_size', 'MTB', 'MarketLeverage',intensity_scale, 'Tangibility',                                                                    'Profitability', 'gvkey',  'fyear']].copy()        comp.loc[:,'mrg']= comp['gvkey'].astype(int).astype(str)+'-'+comp['fyear'].astype(int).astype(str)        #=== Emissions        emissions = trucost[[emission_type, 'cyear', 'gvkey', 'mrg']].copy()        emissions = emissions.sort_values(by = ['gvkey', 'cyear'])        emissions.loc[:,'lag1'] = emissions[['gvkey', emission_type]].groupby('gvkey').shift(-1).values.ravel().tolist()        emissions.loc[:,'lag2'] = emissions[['gvkey', emission_type]].groupby('gvkey').shift(-2).values.ravel().tolist()        emissions.loc[:, emission_type] = emissions[['lag1', 'lag2']].sum(axis = 1, skipna=False).values.tolist()        emissions = emissions[emissions[emission_type] > 0]        #=== Merge them        g = st[['system_thinking']+STcontrols+['mrg']].merge(comp).\            merge(reg.reset_index()).merge(sec.reset_index()).merge(ref.reset_index()).merge(emissions)        g = g[g[intensity_scale] > 0]        g.loc[:,'investment'] = g.Total_invested_capital_BS1.apply(BL.np.log)        g.loc[:,'emission'] = (g[emission_type]/g[intensity_scale]).apply(BL.np.log)        g = g.replace(BL.np.inf, BL.np.nan)        heck = self.Heckman_step(emissions, cdp_firms_copy, non_cdp_firms_copy, emission_type = emission_type)        g = g.merge(heck, on = 'mrg')        g = self.filter_outlier(g, var_to_filter = explanators)        if robustness:            print('robustness test')            sample = BL.np.random.choice(g.gvkey.unique(), int(ratio*len(g.gvkey.unique())), replace = False)            g = g[g.gvkey.isin(sample)]        g = g.dropna()        years = BL.pd.get_dummies(g.fyear, drop_first = True)        g= BL.pd.concat((g, years), axis = 1)        ############        dummies = list(years.columns) #[]        to_drop= []        for i in  list(sec.columns) + list(reg.columns):            if g[i].sum() == 0:                to_drop.append(i)            else:                dummies.append(i)        g = g.drop(columns = to_drop)        g[explanators + ['emission', 'IMR' ]] = \            g[explanators + ['emission', 'IMR' ]].apply(BL.scale)        g = g.replace(BL.np.inf, BL.np.nan)        g = g.dropna()                if add_initiatives_robustness:            print('Adding initiatives')              cdpINI = self.get_initiatives_control()            g = g.merge(cdpINI[['Activity_type', 'mrg']])        if add_goals_robustness:            print('Adding SDGs')               goals = self.get_goals_controls()            g = g.merge(goals)        return  g, explanators, other_esg,  dummies    def Emissions(self, g, explanators, other_esg,  dummies, additional_vars=None):        ghg_outcome = BL.pd.DataFrame()        models=[]        if additional_vars == None:            other_esg_ = other_esg        else:            other_esg_=other_esg+additional_vars                            for mediators in [0,1]:            if mediators == 0:                X = g[explanators + ['IMR' , 'system_thinking']+\                      dummies]            else:                X = g[explanators + [ 'IMR' , 'system_thinking']+\                      other_esg_+dummies]            Y = g['emission']            lm = sm.OLS(Y, X).fit(cov = 'H2', cov_type='cluster', cov_kwds={'groups': g.reset_index()['gvkey']})            standardise_params = lm.params*X.std()/Y.std()            results_ = BL.pd.concat((standardise_params, lm.pvalues), axis = 1)            results_ = results_.drop(index = dummies+['IMR']).round(3)            results_.columns = ['coefficient', 'p-value']            idx = results_.index            results_ = BL.pd.DataFrame([str(results_['coefficient'].iloc[i])+self.significance(results_['p-value'].iloc[i]) for i in range(len(results_))], index = list(idx), columns = ['effect'])            models.append(lm)            ghg_outcome = BL.pd.concat((ghg_outcome, results_), axis  =1)        ghg_outcome = ghg_outcome.replace([BL.np.nan], [''])        ghg_outcome.columns = ['Model '+str(i) for i in range(1, len(ghg_outcome.columns)+1)]                ghg_outcome = ghg_outcome.rename(index =  index_dict)        return g, ghg_outcome, models    #############################################################    ###################### Climate Targets ######################    #############################################################    def get_dt_for_paris(self, paris, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, Y):        intensity_scale =  'Total_invested_capital_BS1'        other_esgs = other_esg_tmp.copy()        #=== Take the average system thinking capacities        st = st[(st.Year < Y) ]        sys_thnk = st[['Pred_label']+STcontrols+['gvkey']].groupby('gvkey').mean()        sys_thnk.columns = ['system_thinking']+STcontrols        sys_thnk.loc[:,'text_length'] = sys_thnk['text_length'].apply(BL.np.log)        sys_thnk = sys_thnk.reset_index()        #=== Take the average target setting intensity        internal_tgt = ref_t[(ref_t.fyear < Y)][['gvkey', 'Is_target']].groupby('gvkey').mean()        #=== Take the average financials        comp = comp[(comp.fyear < Y) ][['firm_size', 'MarketLeverage', intensity_scale, 'MTB', 'Tangibility', 'Profitability',                                                                'gvkey']].groupby('gvkey').mean()        #=== Take the average other ESG variables:        ref = ref[(ref.fyear < Y)][['gvkey'] + other_esgs].groupby('gvkey').mean()        #=== Take the average emissions        emissions  =  trucost[(trucost.cyear < Y) ][['gvkey', 'DirectControl']].groupby('gvkey').mean()        #emissions = emissions.apply(BL.np.log).reset_index()        emissions = emissions.reset_index()                #== Merge the datasets        dt = paris.merge(sys_thnk.reset_index(), on = 'gvkey').merge(comp.reset_index()).\            merge(sec.reset_index()).merge(reg.reset_index()).merge(ref.reset_index()).\                merge(emissions)        dt = dt[dt[intensity_scale] > 0]        dt.loc[:,'investment'] = dt.Total_invested_capital_BS1.apply(BL.np.log)        dt.loc[:,'emission_intensity'] = (dt['DirectControl']/dt[intensity_scale]).apply(BL.np.log)        dt = self.filter_outlier(dt, var_to_filter= ['firm_size', 'investment', 'Tangibility', 'emission_intensity'])        dt = dt.dropna()        return dt, other_esgs    def get_Paris_dt(self, paris2018, paris2019, paris2020, st, comp, sec, reg, ref, ref_t, trucost, explanators, other_esg_tmp, base_dummies, ProjectionYear, marginal_=False, years = 'ALL', robustness=False, ratio = 1):        if years == 'ALL':            print('This is the full sample estimate')            dtA, other_esgs = self.get_dt_for_paris(paris2018, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2018)            dtB, other_esgs = self.get_dt_for_paris(paris2019, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2019)            dtC, other_esgs = self.get_dt_for_paris(paris2020, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2020)            dt = BL.pd.concat((dtA, dtB, dtC)).reset_index(drop=True)        elif years < 2018:            return 0,0,0,0,0,0        elif years == 2018:            dtA, other_esgs = self.get_dt_for_paris(paris2018, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2018)            dt = dtA.copy()        elif years == 2019:            dtA, other_esgs = self.get_dt_for_paris(paris2018, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2018)            dtB, other_esgs = self.get_dt_for_paris(paris2019, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2019)            dt = BL.pd.concat((dtA, dtB)).reset_index(drop=True)        elif years == 2020 or years == 'ALL':            print('This is the full sample estimate')            dtA, other_esgs = self.get_dt_for_paris(paris2018, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2018)            dtB, other_esgs = self.get_dt_for_paris(paris2019, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2019)            dtC, other_esgs = self.get_dt_for_paris(paris2020, st, comp, sec, reg, ref, ref_t, trucost, other_esg_tmp, base_dummies, 2020)            dt = BL.pd.concat((dtA, dtB, dtC)).reset_index(drop=True)        did_it_run = 1        if 2016 in dt.columns:            dt[2016] = dt[2016].replace(BL.np.nan, 0)        if 2015 in dt.columns:            dt[2015] = dt[2015].replace(BL.np.nan, 0)        year_dummies = BL.pd.get_dummies(dt.cyear, drop_first = True)        dt = BL.pd.concat((dt, year_dummies), axis = 1)                #emission_score = Make_Population.MakeUniverse().make_emission_percentile_of_population(trucost, dt)        #print('The average emission of the sample is in the '+str(int(emission_score))+' percentile of the population')        to_drop=[]        dummies = list(year_dummies.columns)        for i in list(sec.columns) + list(reg.columns):            if dt[i].sum() == 0:                to_drop.append(i)            else:                dummies.append(i)        dt = dt.drop(columns = to_drop)        for z in base_dummies:            if z not in dt.columns:                base_dummies.remove(z)        if robustness:            print('robustness test (', ratio, ')')            sample = BL.np.random.choice(dt.gvkey.unique(), int(ratio*len(dt.gvkey.unique())), replace = False)            dt = dt[dt.gvkey.isin(sample)]        if add_initiatives_robustness:            print('Adding initiatives')               cdpINI = self.get_initiatives_control()            dt = dt.merge(cdpINI[['Activity_type', 'mrg']])        if add_goals_robustness:            print('Adding SDGs')               goals = self.get_goals_controls()            dt = dt.merge(goals)        return dt, explanators, other_esgs, dummies, base_dummies, did_it_run        def ParisAgreement(self,dt, explanators, other_esgs, dummies, base_dummies, additional_vars=None ):        full_model = BL.pd.DataFrame()                if additional_vars == None:            other_esgs_ = other_esgs        else:            other_esgs_=other_esgs+additional_vars        for mediators in [0,1]:            if mediators == 0:                X = dt[explanators + [ 'emission_intensity', 'system_thinking']+\                       dummies + base_dummies]            else:                X = dt[explanators + [ 'emission_intensity',  'system_thinking' ]+\                       other_esgs_+ dummies + base_dummies]            Y = dt['alignment']            pr = Probit(Y, X).fit(disp=0)            #=== Standardisation from https://esajournals.onlinelibrary.wiley.com/doi/pdf/10.1002/ecs2.2283            std_Y =pr.predict(X).round().std()/BL.np.sqrt(pr.prsquared)            params_ = pr.params*X.std()/std_Y            res_vector = BL.pd.concat((params_.round(2), pr.pvalues.round(3)), axis = 1)            res_vector.columns = ['coefficient', 'p-value']            res_vector.index = X.columns            st_cntr = list(set(STcontrols) & set(explanators))            res_vector = res_vector.drop(index = dummies + base_dummies)            idx = res_vector.index            res_vector = BL.pd.DataFrame([str(res_vector['coefficient'].iloc[i])+self.significance(res_vector['p-value'].iloc[i]) for i in range(len(res_vector))], index = list(idx), columns = ['effect'])            full_model = BL.pd.concat((full_model, res_vector), axis = 1)            full_model = full_model.replace([BL.np.nan], [''])        full_model=  full_model.rename(index =  index_dict)        full_model.columns = ['Model '+str(i) for i in range(1, len(full_model.columns)+1)]        return dt, full_model                  class RobustnessTest():    def __init__(self):        pass    def iterations(self):        return 2000    def grid(self):        return [0.5,0.6,0.7,0.8,0.9]    def confidence(self):        return 0.05            def get_significance_(self, X):        a = [1 if '*' in X.loc['System Thinking'].iloc[0] else 0][0]        b = [1 if '*' in X.loc['System Thinking'].iloc[1] else 0][0]        coeff_a = float(X.loc['System Thinking'].iloc[0].split('*')[0])        coeff_b = float(X.loc['System Thinking'].iloc[1].split('*')[0])        return a, b,coeff_a, coeff_b    def robustness_to_sample_size_expl(self, compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, ProjectionYear, ref, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms):        efx = Analysis()        iterations = self.iterations()        confidence = self.confidence()        relevance_effect = BL.pd.DataFrame( )         g, dummies = efx.get_explanators_dt(cdp_t, ref_t, system_thinking, compustat, sector, regions, ref, tc, vars_,cdp_firms, non_cdp_firms, robustness=False, ratio = 1)        for _ in range(iterations):            print('Iteration', _, 'Explanators')            tmpG = g.iloc[BL.np.random.choice(len(g), int(0.85*len(g)))]            #==            try:                n_obs, expl, relevant_expl = efx.Explanators(tmpG, dummies)                expl = expl['effect'].apply(lambda x: float(str(x).split('*')[0]))                relevance_effect = BL.pd.concat((relevance_effect, expl), axis = 1)            except Exception:                continue        effect = []        for n in relevance_effect.index:            tmp  = list(relevance_effect.loc[n].sort_values(ascending = True))            tmp = [x for x in tmp if str(x) != 'nan']            measured_effect = BL.np.nanmean(tmp)            vec_ = len(tmp)            confidence=0.01            lower_b1 = tmp[int(vec_*0.5*confidence-1)]            upper_b1 = tmp[int((1-0.5*confidence)*vec_)]            confidence=0.05            lower_b5 = tmp[int(vec_*0.5*confidence-1)]            upper_b5 = tmp[int((1-0.5*confidence)*vec_)]            confidence=0.10            lower_b10 = tmp[int(vec_*0.5*confidence-1)]            upper_b10 = tmp[int((1-0.5*confidence)*vec_)]            significance = BL.np.sign(lower_b5*upper_b5)            effect.append([n, measured_effect, lower_b1, upper_b1, lower_b5, upper_b5, lower_b10, upper_b10, significance])        effect = BL.pd.DataFrame(effect, columns = ['factor', 'effect', 'lower_bound_1', 'upper_bound_1', 'lower_bound_5', 'upper_bound_5', 'lower_bound_10', 'upper_bound_10', 'significance']).set_index('factor')        #===        return (effect)       def robustness_to_sample_size_mediators(self, compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, ProjectionYear, ref,explanators, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms, MARGINAL):        efx = Analysis()        iterations = self.iterations()        confidence = self.confidence()        relevance_effect = BL.pd.DataFrame( )         if 'sample_size' in explanators: explanators.remove('sample_size')        g, explanators, other_esg,   dummies = efx.get_mediators_dt(cdp_t, ref_t, system_thinking, compustat, sector, regions, ref, tc, explanators,  vars_, marginal_=MARGINAL, robustness=False, ratio = 1)        for _ in range(iterations):            print('Iteration', _, 'Mediators')            #==            tmpG = g.iloc[BL.np.random.choice(len(g), int(0.85*len(g)))]            try:                n_obs, meds, relevant_mediators = efx.mediators(tmpG, explanators, other_esg, dummies)                meds = meds['System Thinking'].apply(lambda x: float(str(x).split('*')[0]))                relevance_effect = BL.pd.concat((relevance_effect, meds), axis = 1)            except Exception:                print('Error occurred')                continue        effect = []        for n in relevance_effect.index:            tmp  = list(relevance_effect.loc[n].sort_values(ascending = True))            tmp = [x for x in tmp if str(x) != 'nan']            measured_effect = BL.np.nanmean(tmp)            vec_ = len(tmp)            confidence=0.01            lower_b1 = tmp[int(vec_*0.5*confidence-1)]            upper_b1 = tmp[int((1-0.5*confidence)*vec_)]            confidence=0.05            lower_b5 = tmp[int(vec_*0.5*confidence-1)]            upper_b5 = tmp[int((1-0.5*confidence)*vec_)]            confidence=0.10            lower_b10 = tmp[int(vec_*0.5*confidence-1)]            upper_b10 = tmp[int((1-0.5*confidence)*vec_)]            significance = BL.np.sign(lower_b5*upper_b5)            effect.append([n, measured_effect, lower_b1, upper_b1, lower_b5, upper_b5, lower_b10, upper_b10, significance])        effect = BL.pd.DataFrame(effect, columns = ['factor', 'effect', 'lower_bound_1', 'upper_bound_1', 'lower_bound_5', 'upper_bound_5', 'lower_bound_10', 'upper_bound_10', 'significance']).set_index('factor')        #===        return (effect)      def step_1_explanators(self, compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms):        effect = self.robustness_to_sample_size_expl(compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms)        relevant_explanators_rob = effect[effect.significance > 0]        return effect, relevant_explanators_rob    def step_2_mediators(self, compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref,relevant_explanators_rob, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms, MARGINAL):        effect = self.robustness_to_sample_size_mediators(compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, MAX_ProjectionYear, ref, relevant_explanators_rob, vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms, MARGINAL)        relevant_mediators_rob = effect[effect.significance > 0]        return effect, relevant_mediators_rob     def step_3_efx_robustness(self, compustat, regions, sector, tc, system_thinking, paris2018,paris2019,paris2020, base_dummies, ProjectionYear, ref, explanators,vars_, cdp_t, ref_t, cdp_firms, non_cdp_firms, MARGINAL, years = 'ALL'):        efx = Analysis()        iterations = self.iterations()        confidence = self.confidence()        relevance_effect_EM_1 = []        relevance_effect_EM_2 = []        relevance_effect_TG_1 = []        relevance_effect_TG_2 = []        if 'sample_size' in explanators: explanators.remove('sample_size')        if 'sample_size' in vars_: vars_.remove('sample_size')        dt_tg, explanatorsP, other_esgs, dummiesT, base_dummies, did_it_run = efx.get_Paris_dt(paris2018, paris2019, paris2020, system_thinking, compustat, sector, regions, ref, ref_t, tc,explanators, vars_, base_dummies, ProjectionYear, marginal_=MARGINAL,years=years, robustness=False, ratio = 1)              dt_em, explanators, other_esg,  dummiesE = efx.get_emissions_dt(cdp_t, ref_t, system_thinking, compustat, sector, regions, ref, tc, explanators,  vars_, cdp_firms, non_cdp_firms, robustness=False, ratio = 1)        print (did_it_run)        effective_iterations=0        for _ in range(iterations):            print('Iteration', _, 'Outcome')            #==            tmpDTEM = dt_em.iloc[BL.np.random.choice(len(dt_em), int(0.85*len(dt_em)))].reset_index(drop=True)            if did_it_run == 1:                tmpDTTG = dt_tg.iloc[BL.np.random.choice(len(dt_tg), int(0.85*len(dt_tg)))].reset_index(drop=True)            try:                n_obs, ghg, _ = efx.Emissions(tmpDTEM,  explanators, other_esg,  dummiesE)                                ghg1 = str(ghg['Model 1'].loc['System Thinking']).split('*')[0]                relevance_effect_EM_1.append(float(ghg1))                ghg2 = str(ghg['Model 2'].loc['System Thinking']).split('*')[0]                relevance_effect_EM_2.append(float(ghg2))                #===                if did_it_run == 1:                    _, tgt = efx.ParisAgreement(tmpDTTG, explanatorsP, other_esgs, dummiesT, base_dummies)                    tgt1 = str(tgt['Model 1'].loc['System Thinking']).split('*')[0]                    relevance_effect_TG_1.append(float(tgt1))                    tgt2 = str(tgt['Model 2'].loc['System Thinking']).split('*')[0]                    relevance_effect_TG_2.append(float(tgt2))                    effective_iterations+=1            except Exception:                print(traceback.format_exc())                print('ERROR')                continue        effects = []        if did_it_run == 1:            models = [relevance_effect_EM_1, relevance_effect_EM_2, relevance_effect_TG_1, relevance_effect_TG_2]            names = ['Emissions', 'Emissions (wp)', 'Climate targets', 'Climate targets (wp)']        else:            models = [relevance_effect_EM_1, relevance_effect_EM_2]            names = ['Emissions', 'Emissions (wp)']        ###         count=0               for m in models:            tmp  = BL.np.sort(m)            tmp = [x for x in tmp if str(x) != 'nan']            measured_effect = BL.np.nanmean(tmp)            vec_ = len(tmp)            confidence=0.01            lower_b1 = tmp[int(vec_*0.5*confidence-1)]            upper_b1 = tmp[int((1-0.5*confidence)*vec_)]            confidence=0.05            lower_b5 = tmp[int(vec_*0.5*confidence-1)]            upper_b5 = tmp[int((1-0.5*confidence)*vec_)]            confidence=0.10            lower_b10 = tmp[int(vec_*0.5*confidence-1)]            upper_b10 = tmp[int((1-0.5*confidence)*vec_)]            significance = BL.np.sign(lower_b5*upper_b5)            effects.append([names[count], measured_effect, lower_b1, upper_b1, lower_b5, upper_b5, lower_b10, upper_b10, significance])            count+=1        effects = BL.pd.DataFrame(effects, columns = ['factor', 'effect', 'lower_bound_1', 'upper_bound_1', 'lower_bound_5', 'upper_bound_5', 'lower_bound_10', 'upper_bound_10', 'significance']).set_index('factor')        #===        return (effects)             #==========================================#    #=========== Plotting functions ===========#    #==========================================#    def plot_coefficients(self, effect, ax, panel_):        effect = effect.rename(index = abb_index_dict)        effect  = effect.sort_values(by = 'effect')        bar = effect['effect'].iloc[::-1].plot.barh(color = 'teal', alpha = 0.65,  ax = ax)        hatches = ['//']*len(bar.patches)        for i,thisbar in enumerate(bar.patches):            thisbar.set_color('#003366')            thisbar.set_edgecolor('k')        mks = ['*', 'o', 'v']        cnfL = [1,5,10]        for count in [1]:            cnf = cnfL[count]            effect['lower_bound_'+str(cnf)].iloc[::-1].reset_index().plot.scatter(marker = mks[count],   s = 100, color = 'k', x = 'lower_bound_'+str(cnf), y = 'factor', ax = ax)            effect['upper_bound_'+str(cnf)].iloc[::-1].reset_index().plot.scatter(marker = mks[count],   s = 100, color = 'k', x = 'upper_bound_'+str(cnf), y = 'factor', ax = ax)            for i in range(len(effect)):                                  BL.plt.plot([effect['lower_bound_'+str(cnf)].iloc[i], effect['upper_bound_'+str(cnf)].iloc[i]], [effect.index[i], effect.index[i]], color='k', lw = 0.5)        BL.plt.axvline(x = 0, ls = '-.', c = 'k', lw = 1)          BL.plt.xlabel('Standardised coefficients')                BL.plt.text(panel_[0], panel_[1], panel_[2])        BL.plt.ylabel('')                           #============    def plot_FIGURE4PANELA(self, effect, ax, panel_):        colorFULL  = ['#ff6666', '#00aeff', '#004444', '#ff6666', '#00aeff', '#004444']        x = effect.reset_index()        x['factor'] = x['factor'].apply(lambda x: x.split('$')[0])        x['factor'] = ['GHG Emissions', 'GHG Emissions',                        r'Climate Target$_{[<\mathrm{2^{\circ}}]}$', r'Climate Target$_{[<\mathrm{2^{\circ}}]}$',                        r'Climate Target$_{[\ll\mathrm{2^{\circ}}]}$', r'Climate Target$_{[\ll\mathrm{2^{\circ}}]}$',                        ]        x['Type'] = ['W/o Policies', 'With policies', 'W/o Policies', 'With policies', 'W/o Policies', 'With policies']        #BL.sns.barplot(x = 'effect', y = 'factor', hue = 'Type', data = x)        bar = BL.sns.barplot(x = 'effect', y = 'factor', hue = 'Type', data = x, ax = ax, alpha = 0.85)        hatches = ['', '', '', 'o', 'o', 'o']        count = 0        for i,thisbar in enumerate(bar.patches):            thisbar.set_color(colorFULL[count])            thisbar.set_edgecolor('k')            thisbar.set_hatch(hatches[i])            count+=1        mks = ['*', 'o', 'v']        cnfL = [1,5,10]        x.index = [-0.2,0.2,0.8,1.2,1.8,2.2]        for count in [1]:            cnf = cnfL[count]            x[['lower_bound_'+str(cnf)]].reset_index().iloc[::-1].plot.scatter(marker = mks[count],   s = 100, color = 'k', x = 'lower_bound_'+str(cnf), y = 'index', ax = ax)            x[['upper_bound_'+str(cnf)]].reset_index().iloc[::-1].plot.scatter(marker = mks[count],   s = 100, color = 'k', x = 'upper_bound_'+str(cnf), y = 'index', ax = ax)            for i in range(len(effect)):                                  BL.plt.plot([x['lower_bound_'+str(cnf)].iloc[i], x['upper_bound_'+str(cnf)].iloc[i]],                             [x.index[i], x.index[i]], color='k', lw = 0.5)        BL.plt.axvline(x = 0, ls = '-.', c = 'k', lw = 1)          legend_elements = [ Patch(facecolor='gray', edgecolor='k',                                  label='W/o Policies'),                           Patch(facecolor='gray', edgecolor='k', hatch = 'o',                                 label= 'With CSR policies')]        ax.legend(handles=legend_elements)        BL.plt.text(panel_[0], panel_[1], panel_[2])        BL.plt.xlabel('Standardised coefficient of O-ST')        BL.plt.ylabel('')    def plot_time_stability(self, ax, ANNOTATOR = 'Triple'):        emW, ctWB, ctWW = [], [], []        cnf = 5        for year in range(2015,2021):            outcome_effects = BL.pickle.load(open('YearEffect/Robustness_to_sample_size'+ANNOTATOR+'_'+str(year)+'_below.pckl', 'rb'))            emW.append([outcome_effects.loc['Emissions (wp)']['effect'], outcome_effects.loc['Emissions']['lower_bound_'+str(cnf)], outcome_effects.loc['Emissions (wp)']['upper_bound_'+str(cnf)]])            if year >= 2018:                ctWB.append([outcome_effects.loc['Climate targets (wp)']['effect'], outcome_effects.loc['Climate targets']['lower_bound_'+str(cnf)], outcome_effects.loc['Climate targets (wp)']['upper_bound_'+str(cnf)]])                outcome_effects = BL.pickle.load(open('YearEffect/Robustness_to_sample_size'+ANNOTATOR+'_'+str(year)+'_well_below.pckl', 'rb'))                ctWW.append([outcome_effects.loc['Climate targets (wp)']['effect'], outcome_effects.loc['Climate targets']['lower_bound_'+str(cnf)], outcome_effects.loc['Climate targets (wp)']['upper_bound_'+str(cnf)]])        emW = BL.pd.DataFrame(emW, index = [i for i in range(2015,2021)], columns = ['Effect', 'lower_bound_'+str(cnf), 'upper_bound_'+str(cnf)])        emW = emW.loc[:2019]        ctWB = BL.pd.DataFrame(ctWB, index = [i for i in range(2018,2021)], columns = ['Effect', 'lower_bound_'+str(cnf), 'upper_bound_'+str(cnf)])        ctWW = BL.pd.DataFrame(ctWW, index = [i for i in range(2018,2021)], columns = ['Effect', 'lower_bound_'+str(cnf), 'upper_bound_'+str(cnf)])        ctWW = ctWW.replace(BL.np.inf, 0.72)        models = [emW, ctWB, ctWW]        models_names = ['GHG Emissions',                        r'Climate Target$_{[<\mathrm{2^{\circ}}]}$',                        r'Climate Target$_{[\ll\mathrm{2^{\circ}}]}$',                       ]        #==        marker_type = ['o', 's', '^', '>', '*', 'v']        line_style = ['-', '--', '-.', '-.']        colorFULL  = ['#ff6666', '#00aeff', '#004444']        count = 0        for m in models:            m[['Effect']].rename(columns = {'Effect': models_names[count]}).plot(color = colorFULL[count], ls = line_style[count],  marker =marker_type[count], ax = ax, legend = True)            m['lower_bound_'+str(cnf)].plot(ls = '', marker = '_', ms = 20, color = colorFULL[count], ax = ax, legend = False)            m['upper_bound_'+str(cnf)].plot(ls = '', marker = '_', ms = 20, color = colorFULL[count], ax = ax, legend = False)            for i in range(len(m)):                                  BL.plt.plot([m.index[i], m.index[i]], [m['lower_bound_'+str(cnf)].iloc[i], m['upper_bound_'+str(cnf)].iloc[i]], color = colorFULL[count], ls = line_style[count])            count+=1        BL.plt.axhline(y = 0, c = 'k', lw = 0.5)        BL.plt.ylabel('Standardised coefficient of O-ST')        BL.plt.text(2015, 1.94, 'B')                a = BL.pd.DataFrame(models[0])        a = a['Effect'].round(2).astype(str)+' ['+a['lower_bound_5'].astype(str)+','+a['upper_bound_5'].astype(str)+']'        b = BL.pd.DataFrame(models[1])        b = b['Effect'].round(2).astype(str)+' ['+b['lower_bound_5'].astype(str)+','+b['upper_bound_5'].astype(str)+']'        c = BL.pd.DataFrame(models[2])        c = c['Effect'].round(2).astype(str)+' ['+c['lower_bound_5'].astype(str)+','+c['upper_bound_5'].astype(str)+']'        tab = BL.pd.concat((a, b, c), axis = 1).replace(BL.np.nan, '')        tab.columns = ['Emissions$_{_{\mathrm{CSR\ policies}}}$', r'Clim.Trg.$_{_{\mathrm{Below\ 2^{\circ}, CSR\ policies}}}$',r'Clim.Trg.$_{_{\mathrm{Well below\ 2^{\circ}, CSR\ policies}}}$']        return tab